{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>chr</th>\n",
       "      <th>source</th>\n",
       "      <th>type</th>\n",
       "      <th>start</th>\n",
       "      <th>end</th>\n",
       "      <th>score</th>\n",
       "      <th>strand</th>\n",
       "      <th>phase</th>\n",
       "      <th>attr</th>\n",
       "      <th>ID</th>\n",
       "      <th>Symbol</th>\n",
       "      <th>promoter_start</th>\n",
       "      <th>promoter_end</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>661</td>\n",
       "      <td>chr1</td>\n",
       "      <td>HAVANA</td>\n",
       "      <td>gene</td>\n",
       "      <td>944204.0</td>\n",
       "      <td>959309.0</td>\n",
       "      <td>.</td>\n",
       "      <td>-</td>\n",
       "      <td>.</td>\n",
       "      <td>ID=ENSG00000188976.10;gene_id=ENSG00000188976....</td>\n",
       "      <td>ENSG00000188976</td>\n",
       "      <td>NOC2L</td>\n",
       "      <td>958809.0</td>\n",
       "      <td>959809.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>705</td>\n",
       "      <td>chr1</td>\n",
       "      <td>HAVANA</td>\n",
       "      <td>gene</td>\n",
       "      <td>960587.0</td>\n",
       "      <td>965715.0</td>\n",
       "      <td>.</td>\n",
       "      <td>+</td>\n",
       "      <td>.</td>\n",
       "      <td>ID=ENSG00000187961.13;gene_id=ENSG00000187961....</td>\n",
       "      <td>ENSG00000187961</td>\n",
       "      <td>KLHL17</td>\n",
       "      <td>960087.0</td>\n",
       "      <td>961087.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>895</td>\n",
       "      <td>chr1</td>\n",
       "      <td>HAVANA</td>\n",
       "      <td>gene</td>\n",
       "      <td>998962.0</td>\n",
       "      <td>1000172.0</td>\n",
       "      <td>.</td>\n",
       "      <td>-</td>\n",
       "      <td>.</td>\n",
       "      <td>ID=ENSG00000188290.10;gene_id=ENSG00000188290....</td>\n",
       "      <td>ENSG00000188290</td>\n",
       "      <td>HES4</td>\n",
       "      <td>999672.0</td>\n",
       "      <td>1000672.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>931</td>\n",
       "      <td>chr1</td>\n",
       "      <td>HAVANA</td>\n",
       "      <td>gene</td>\n",
       "      <td>1001138.0</td>\n",
       "      <td>1014541.0</td>\n",
       "      <td>.</td>\n",
       "      <td>+</td>\n",
       "      <td>.</td>\n",
       "      <td>ID=ENSG00000187608.8;gene_id=ENSG00000187608.8...</td>\n",
       "      <td>ENSG00000187608</td>\n",
       "      <td>ISG15</td>\n",
       "      <td>1000638.0</td>\n",
       "      <td>1001638.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>959</td>\n",
       "      <td>chr1</td>\n",
       "      <td>HAVANA</td>\n",
       "      <td>gene</td>\n",
       "      <td>1020123.0</td>\n",
       "      <td>1056118.0</td>\n",
       "      <td>.</td>\n",
       "      <td>+</td>\n",
       "      <td>.</td>\n",
       "      <td>ID=ENSG00000188157.14;gene_id=ENSG00000188157....</td>\n",
       "      <td>ENSG00000188157</td>\n",
       "      <td>AGRN</td>\n",
       "      <td>1019623.0</td>\n",
       "      <td>1020623.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Unnamed: 0   chr  source  type      start        end score strand phase  \\\n",
       "0         661  chr1  HAVANA  gene   944204.0   959309.0     .      -     .   \n",
       "1         705  chr1  HAVANA  gene   960587.0   965715.0     .      +     .   \n",
       "2         895  chr1  HAVANA  gene   998962.0  1000172.0     .      -     .   \n",
       "3         931  chr1  HAVANA  gene  1001138.0  1014541.0     .      +     .   \n",
       "4         959  chr1  HAVANA  gene  1020123.0  1056118.0     .      +     .   \n",
       "\n",
       "                                                attr               ID  Symbol  \\\n",
       "0  ID=ENSG00000188976.10;gene_id=ENSG00000188976....  ENSG00000188976   NOC2L   \n",
       "1  ID=ENSG00000187961.13;gene_id=ENSG00000187961....  ENSG00000187961  KLHL17   \n",
       "2  ID=ENSG00000188290.10;gene_id=ENSG00000188290....  ENSG00000188290    HES4   \n",
       "3  ID=ENSG00000187608.8;gene_id=ENSG00000187608.8...  ENSG00000187608   ISG15   \n",
       "4  ID=ENSG00000188157.14;gene_id=ENSG00000188157....  ENSG00000188157    AGRN   \n",
       "\n",
       "   promoter_start  promoter_end  \n",
       "0        958809.0      959809.0  \n",
       "1        960087.0      961087.0  \n",
       "2        999672.0     1000672.0  \n",
       "3       1000638.0     1001638.0  \n",
       "4       1019623.0     1020623.0  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "annotation = pd.read_csv('../../data/pancancer/TCGA/methylation/annotation_with_promoters_1000bp_basic.tsv', sep='\\t')\n",
    "annotation.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "bed_f = annotation[['chr', 'promoter_start', 'promoter_end', 'Symbol']]\n",
    "bed_f.columns = ['chr', 'start', 'end', 'Symbol']\n",
    "bed_f.to_csv('../../data/pancancer/TCGA/methylation/promoter_only_track_1000bp_basic.bed', sep='\\t', header=None, index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>chr</th>\n",
       "      <th>start</th>\n",
       "      <th>end</th>\n",
       "      <th>Symbol</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>chr1</td>\n",
       "      <td>959059.0</td>\n",
       "      <td>959559.0</td>\n",
       "      <td>NOC2L</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>chr1</td>\n",
       "      <td>960337.0</td>\n",
       "      <td>960837.0</td>\n",
       "      <td>KLHL17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>chr1</td>\n",
       "      <td>999922.0</td>\n",
       "      <td>1000422.0</td>\n",
       "      <td>HES4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>chr1</td>\n",
       "      <td>1000888.0</td>\n",
       "      <td>1001388.0</td>\n",
       "      <td>ISG15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>chr1</td>\n",
       "      <td>1019873.0</td>\n",
       "      <td>1020373.0</td>\n",
       "      <td>AGRN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    chr      start        end  Symbol\n",
       "0  chr1   959059.0   959559.0   NOC2L\n",
       "1  chr1   960337.0   960837.0  KLHL17\n",
       "2  chr1   999922.0  1000422.0    HES4\n",
       "3  chr1  1000888.0  1001388.0   ISG15\n",
       "4  chr1  1019873.0  1020373.0    AGRN"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bed_f.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>Composite Element REF</th>\n",
       "      <th>Beta_value_sum</th>\n",
       "      <th>Chromosome</th>\n",
       "      <th>Start</th>\n",
       "      <th>End</th>\n",
       "      <th>Gene_Symbol</th>\n",
       "      <th>Gene_Type</th>\n",
       "      <th>Transcript_ID</th>\n",
       "      <th>Position_to_TSS</th>\n",
       "      <th>CGI_Coordinate</th>\n",
       "      <th>Feature_Type</th>\n",
       "      <th>Beta_value_mean</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>cg00000029</td>\n",
       "      <td>1305.707632</td>\n",
       "      <td>chr16</td>\n",
       "      <td>53434200</td>\n",
       "      <td>53434201</td>\n",
       "      <td>RBL2;RBL2;RBL2</td>\n",
       "      <td>protein_coding;protein_coding;protein_coding</td>\n",
       "      <td>ENST00000262133.9;ENST00000544405.5;ENST000005...</td>\n",
       "      <td>-221;-1420;222</td>\n",
       "      <td>CGI:chr16:53434489-53435297</td>\n",
       "      <td>N_Shore</td>\n",
       "      <td>0.286026</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>cg00000108</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>chr3</td>\n",
       "      <td>37417715</td>\n",
       "      <td>37417716</td>\n",
       "      <td>C3orf35;C3orf35;C3orf35;C3orf35;C3orf35;C3orf3...</td>\n",
       "      <td>lincRNA;lincRNA;lincRNA;lincRNA;lincRNA;lincRN...</td>\n",
       "      <td>ENST00000328376.8;ENST00000332506.6;ENST000004...</td>\n",
       "      <td>18552;18552;6505;31445;18143;447;18552;18552</td>\n",
       "      <td>CGI:chr3:37451927-37453047</td>\n",
       "      <td>.</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>cg00000109</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>chr3</td>\n",
       "      <td>172198247</td>\n",
       "      <td>172198248</td>\n",
       "      <td>FNDC3B;FNDC3B;FNDC3B;FNDC3B;FNDC3B;FNDC3B</td>\n",
       "      <td>protein_coding;protein_coding;protein_coding;p...</td>\n",
       "      <td>ENST00000336824.7;ENST00000415807.5;ENST000004...</td>\n",
       "      <td>157692;158618;151333;71272;158587;71273</td>\n",
       "      <td>CGI:chr3:172039703-172040934</td>\n",
       "      <td>.</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>cg00000165</td>\n",
       "      <td>1931.894856</td>\n",
       "      <td>chr1</td>\n",
       "      <td>90729117</td>\n",
       "      <td>90729118</td>\n",
       "      <td>.</td>\n",
       "      <td>.</td>\n",
       "      <td>.</td>\n",
       "      <td>.</td>\n",
       "      <td>CGI:chr1:90724932-90727247</td>\n",
       "      <td>S_Shore</td>\n",
       "      <td>0.423197</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>cg00000236</td>\n",
       "      <td>4043.750654</td>\n",
       "      <td>chr8</td>\n",
       "      <td>42405776</td>\n",
       "      <td>42405777</td>\n",
       "      <td>VDAC3</td>\n",
       "      <td>protein_coding</td>\n",
       "      <td>ENST00000022615.7</td>\n",
       "      <td>13872</td>\n",
       "      <td>CGI:chr8:42410918-42411241</td>\n",
       "      <td>.</td>\n",
       "      <td>0.885816</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Unnamed: 0 Composite Element REF  Beta_value_sum Chromosome      Start  \\\n",
       "0           0            cg00000029     1305.707632      chr16   53434200   \n",
       "1           1            cg00000108        0.000000       chr3   37417715   \n",
       "2           2            cg00000109        0.000000       chr3  172198247   \n",
       "3           3            cg00000165     1931.894856       chr1   90729117   \n",
       "4           4            cg00000236     4043.750654       chr8   42405776   \n",
       "\n",
       "         End                                        Gene_Symbol  \\\n",
       "0   53434201                                     RBL2;RBL2;RBL2   \n",
       "1   37417716  C3orf35;C3orf35;C3orf35;C3orf35;C3orf35;C3orf3...   \n",
       "2  172198248          FNDC3B;FNDC3B;FNDC3B;FNDC3B;FNDC3B;FNDC3B   \n",
       "3   90729118                                                  .   \n",
       "4   42405777                                              VDAC3   \n",
       "\n",
       "                                           Gene_Type  \\\n",
       "0       protein_coding;protein_coding;protein_coding   \n",
       "1  lincRNA;lincRNA;lincRNA;lincRNA;lincRNA;lincRN...   \n",
       "2  protein_coding;protein_coding;protein_coding;p...   \n",
       "3                                                  .   \n",
       "4                                     protein_coding   \n",
       "\n",
       "                                       Transcript_ID  \\\n",
       "0  ENST00000262133.9;ENST00000544405.5;ENST000005...   \n",
       "1  ENST00000328376.8;ENST00000332506.6;ENST000004...   \n",
       "2  ENST00000336824.7;ENST00000415807.5;ENST000004...   \n",
       "3                                                  .   \n",
       "4                                  ENST00000022615.7   \n",
       "\n",
       "                                Position_to_TSS                CGI_Coordinate  \\\n",
       "0                                -221;-1420;222   CGI:chr16:53434489-53435297   \n",
       "1  18552;18552;6505;31445;18143;447;18552;18552    CGI:chr3:37451927-37453047   \n",
       "2       157692;158618;151333;71272;158587;71273  CGI:chr3:172039703-172040934   \n",
       "3                                             .    CGI:chr1:90724932-90727247   \n",
       "4                                         13872    CGI:chr8:42410918-42411241   \n",
       "\n",
       "  Feature_Type  Beta_value_mean  \n",
       "0      N_Shore         0.286026  \n",
       "1            .         0.000000  \n",
       "2            .         0.000000  \n",
       "3      S_Shore         0.423197  \n",
       "4            .         0.885816  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\"\"\"\n",
    "meth_df = pd.read_csv('../../data/pancancer/TCGA/methylation/download/99b667be-e75c-458b-ba62-9931523b9504/jhu-usc.edu_UCEC.HumanMethylation450.23.lvl-3.TCGA-2E-A9G8-01A-11D-A409-05.gdc_hg38.txt',\n",
    "                      sep='\\t')\n",
    "\"\"\"\n",
    "meth_df = pd.read_csv('../../data/pancancer/TCGA/methylation/mean_450k_methylation_profile.tsv', sep='\\t')\n",
    "meth_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "meth_bed = meth_df[['Chromosome', 'Start', 'End', 'Beta_value_mean']]\n",
    "meth_bed = meth_bed[~meth_bed.Chromosome.str.startswith('*')]\n",
    "meth_bed.Beta_value_mean.fillna(0, inplace=True)\n",
    "meth_bed.to_csv('../../data/pancancer/TCGA/methylation/mean_meth_450k_track.bedGraph', sep='\\t', header=None, index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(480457, 4)"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "meth_bed[~meth_bed.Chromosome.str.startswith('*')].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Composite Element REF</th>\n",
       "      <th>Beta_value</th>\n",
       "      <th>Chromosome</th>\n",
       "      <th>Start</th>\n",
       "      <th>End</th>\n",
       "      <th>Gene_Symbol</th>\n",
       "      <th>Gene_Type</th>\n",
       "      <th>Transcript_ID</th>\n",
       "      <th>Position_to_TSS</th>\n",
       "      <th>CGI_Coordinate</th>\n",
       "      <th>Feature_Type</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>cg00000292</td>\n",
       "      <td>0.478926</td>\n",
       "      <td>chr16</td>\n",
       "      <td>28878779</td>\n",
       "      <td>28878780</td>\n",
       "      <td>ATP2A1;ATP2A1;ATP2A1;ATP2A1;ATP2A1</td>\n",
       "      <td>protein_coding;protein_coding;protein_coding;p...</td>\n",
       "      <td>ENST00000357084.6;ENST00000395503.7;ENST000005...</td>\n",
       "      <td>373;290;-1275;-465;-83</td>\n",
       "      <td>CGI:chr16:28879633-28880547</td>\n",
       "      <td>N_Shore</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>cg00002426</td>\n",
       "      <td>0.251826</td>\n",
       "      <td>chr3</td>\n",
       "      <td>57757816</td>\n",
       "      <td>57757817</td>\n",
       "      <td>SLMAP;SLMAP;SLMAP;SLMAP;SLMAP;SLMAP</td>\n",
       "      <td>protein_coding;protein_coding;protein_coding;p...</td>\n",
       "      <td>ENST00000295951.6;ENST00000295952.6;ENST000003...</td>\n",
       "      <td>1585;368;261;257;257;514</td>\n",
       "      <td>CGI:chr3:57756198-57757263</td>\n",
       "      <td>S_Shore</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>cg00003994</td>\n",
       "      <td>0.078171</td>\n",
       "      <td>chr7</td>\n",
       "      <td>15686237</td>\n",
       "      <td>15686238</td>\n",
       "      <td>MEOX2</td>\n",
       "      <td>protein_coding</td>\n",
       "      <td>ENST00000262041.5</td>\n",
       "      <td>576</td>\n",
       "      <td>CGI:chr7:16399497-16399700</td>\n",
       "      <td>.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>cg00005847</td>\n",
       "      <td>0.833416</td>\n",
       "      <td>chr2</td>\n",
       "      <td>176164345</td>\n",
       "      <td>176164346</td>\n",
       "      <td>AC009336.19;HOXD3;HOXD3;HOXD3;RP11-387A1.5</td>\n",
       "      <td>protein_coding;protein_coding;protein_coding;p...</td>\n",
       "      <td>ENST00000468418.4;ENST00000249440.4;ENST000004...</td>\n",
       "      <td>13259;267;3453;27387;1372</td>\n",
       "      <td>CGI:chr2:176164685-176165509</td>\n",
       "      <td>N_Shore</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>cg00006414</td>\n",
       "      <td>NaN</td>\n",
       "      <td>chr7</td>\n",
       "      <td>149125745</td>\n",
       "      <td>149125746</td>\n",
       "      <td>RN7SL521P;ZNF398;ZNF425;ZNF425</td>\n",
       "      <td>misc_RNA;protein_coding;protein_coding;protein...</td>\n",
       "      <td>ENST00000488398.3;ENST00000426851.5;ENST000003...</td>\n",
       "      <td>242;-672;602;562</td>\n",
       "      <td>CGI:chr7:149126122-149127136</td>\n",
       "      <td>N_Shore</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Composite Element REF  Beta_value Chromosome      Start        End  \\\n",
       "0            cg00000292    0.478926      chr16   28878779   28878780   \n",
       "1            cg00002426    0.251826       chr3   57757816   57757817   \n",
       "2            cg00003994    0.078171       chr7   15686237   15686238   \n",
       "3            cg00005847    0.833416       chr2  176164345  176164346   \n",
       "4            cg00006414         NaN       chr7  149125745  149125746   \n",
       "\n",
       "                                  Gene_Symbol  \\\n",
       "0          ATP2A1;ATP2A1;ATP2A1;ATP2A1;ATP2A1   \n",
       "1         SLMAP;SLMAP;SLMAP;SLMAP;SLMAP;SLMAP   \n",
       "2                                       MEOX2   \n",
       "3  AC009336.19;HOXD3;HOXD3;HOXD3;RP11-387A1.5   \n",
       "4              RN7SL521P;ZNF398;ZNF425;ZNF425   \n",
       "\n",
       "                                           Gene_Type  \\\n",
       "0  protein_coding;protein_coding;protein_coding;p...   \n",
       "1  protein_coding;protein_coding;protein_coding;p...   \n",
       "2                                     protein_coding   \n",
       "3  protein_coding;protein_coding;protein_coding;p...   \n",
       "4  misc_RNA;protein_coding;protein_coding;protein...   \n",
       "\n",
       "                                       Transcript_ID  \\\n",
       "0  ENST00000357084.6;ENST00000395503.7;ENST000005...   \n",
       "1  ENST00000295951.6;ENST00000295952.6;ENST000003...   \n",
       "2                                  ENST00000262041.5   \n",
       "3  ENST00000468418.4;ENST00000249440.4;ENST000004...   \n",
       "4  ENST00000488398.3;ENST00000426851.5;ENST000003...   \n",
       "\n",
       "             Position_to_TSS                CGI_Coordinate Feature_Type  \n",
       "0     373;290;-1275;-465;-83   CGI:chr16:28879633-28880547      N_Shore  \n",
       "1   1585;368;261;257;257;514    CGI:chr3:57756198-57757263      S_Shore  \n",
       "2                        576    CGI:chr7:16399497-16399700            .  \n",
       "3  13259;267;3453;27387;1372  CGI:chr2:176164685-176165509      N_Shore  \n",
       "4           242;-672;602;562  CGI:chr7:149126122-149127136      N_Shore  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "meth_df = pd.read_csv('../../data/pancancer/TCGA/methylation/download/ffc1e10c-1e6a-401c-9e1e-bf102da44d98/jhu-usc.edu_GBM.HumanMethylation27.3.lvl-3.TCGA-15-1447-01A-01D-0521-05.gdc_hg38.txt', sep='\\t')\n",
    "meth_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "meth_bed = meth_df[['Chromosome', 'Start', 'End', 'Beta_value']]\n",
    "meth_bed = meth_bed[~meth_bed.Chromosome.str.startswith('*')]\n",
    "meth_bed.Beta_value.fillna(0, inplace=True)\n",
    "meth_bed.to_csv('../../data/pancancer/TCGA/methylation/meth_27k_GBM.bedGraph', sep='\\t', header=None, index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
